Subgroup Identification Analysis

GBSG Example

Author

Larry Leon

Code
# Set options
knitr::opts_chunk$set(
  echo = TRUE,
  warning = FALSE,
  message = FALSE,
  fig.align = 'center',
  fig.retina = 2
)
rm(list=ls())
library(tinytex)
Warning: package 'tinytex' was built under R version 4.5.2
Code
library(ggplot2)

#library(table1)

library(gt)

library(survival)
library(data.table)
library(randomForest)
library(grf)
library(policytree)
library(DiagrammeR)

#library(grid)
#library(forestploter)
#library(randomizr)

# library(devtools)
# install_github("larry-leon/weightedsurv", force = TRUE)
#install.packages("weightedsurv")
# install_github("larry-leon/forestsearch", force = TRUE)

library(forestsearch)
library(weightedsurv)

# Set theme for plots
theme_set(theme_minimal(base_size = 12))

1 Summary

Reproducing main GBSG analysis

1.1 Datasetup

Code
df.analysis <- gbsg
df.analysis <- within(df.analysis,{
id <- as.numeric(c(1:nrow(df.analysis)))  
# time to months
time_months <- rfstime/30.4375
grade3 <- ifelse(grade=="3",1,0)
treat <- hormon
})
confounders.name <- c("age","meno","size","grade3","nodes","pgr","er")
outcome.name <- c("time_months")
event.name <- c("status")
id.name <- c("id")
treat.name <- c("hormon")

1.2 Kaplan-Meier curves and baseline summary

Code
dfcount <- df_counting(
  df = df.analysis,
  by.risk = 6,
  tte.name = outcome.name, 
  event.name = event.name, 
  treat.name = treat.name
)
plot_weighted_km(dfcount, conf.int = TRUE, show.logrank = TRUE, ymax = 1.05, xmed.fraction = 0.775, ymed.offset = 0.125)

Code
create_summary_table(data = df.analysis, treat_var = treat.name, 
                     table_title = "GBSG Characteristics by Treatment Arm",
                                      vars_continuous=c("age","nodes","size","er","pgr"),
                                      vars_categorical=c("grade","grade3"),
                                      font_size = 12)
GBSG Characteristics by Treatment Arm
Characteristic Control (n=440) Treatment (n=246) P-value1 SMD2
age Mean (SD) 51.1 (10.0) 56.6 (9.4) <0.001 0.57
nodes Mean (SD) 4.9 (5.6) 5.1 (5.3) 0.665 0.03
size Mean (SD) 29.6 (14.4) 28.8 (14.1) 0.470 0.06
er Mean (SD) 79.7 (124.2) 125.8 (191.1) <0.001 0.30
pgr Mean (SD) 102.0 (170.0) 124.3 (249.7) 0.213 0.11
grade 0.273 0.06
1 48 (10.9%) 33 (13.4%)
2 281 (63.9%) 163 (66.3%)
3 111 (25.2%) 50 (20.3%)
grade3 0.174 0.05
0 329 (74.8%) 196 (79.7%)
1 111 (25.2%) 50 (20.3%)
1 P-values: t-test for continuous, chi-square/Fisher's exact for categorical/binary variables
2 SMD = Standardized mean difference (Cohen's d for continuous, Cramer's V for categorical)

1.3 GRF analysis

Code
## GRF
grf_est1 <- grf.subg.harm.survival(data=df.analysis,
confounders.name = confounders.name,
outcome.name=outcome.name, event.name=event.name, id.name=id.name, treat.name=treat.name,
maxdepth = 2, n.min = 60, dmin.grf = 12, frac.tau=0.6, details=TRUE)
tau, maxdepth = 46.75811 2 
   leaf.node control.mean control.size control.se depth
1          2         6.49        82.00       3.34     1
2          3        -4.10       604.00       1.06     1
11         4        -7.90       112.00       2.81     2
21         5         3.86       177.00       1.87     2
4          7        -5.89       356.00       1.33     2

Selected subgroup:
  leaf.node control.mean control.size control.se depth
1         2         6.49        82.00       3.34     1

GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "er <= 0"

All splits:
[1] "er <= 0"   "age <= 50" "age <= 43"
Code
# NOTE: In general for GRF trees
# leaf1 --> recommend control
# leaf2 --> recommend treatment
# Tree depth 1
plot(grf_est1$tree1,leaf.labels=c("Control","Treat"))
Code
# Tree depth 2
plot(grf_est1$tree2,leaf.labels=c("Control","Treat"))

1.4 Forestsearch with depth=2 (maxk = 2)

Code
# Setup parallel processing
library(doFuture)
library(doRNG)

registerDoFuture()
registerDoRNG()

system.time({fs <- forestsearch(df.analysis,  confounders.name = confounders.name,
                                outcome.name = "time_months", treat.name = "hormon", event.name = "status", id.name = "id",
                                potentialOutcome.name = NULL, 
                                df.test = NULL,
                                flag_harm.name = NULL,
                                hr.threshold = 1.25, hr.consistency = 1.0, pconsistency.threshold = 0.90,
                                sg_focus = "hr", max_subgroups_search = 30,
                                use_twostage = TRUE,
                                showten_subgroups = TRUE, details=TRUE,
                                conf_force = NULL,
                                cut_type = "default", use_grf = TRUE, plot.grf = TRUE, use_lasso = TRUE,
                                maxk = 2, fs.splits = 1000,
                                n.min = 60, d0.min = 10, d1.min = 10,
                                plot.sg = TRUE, by.risk = 6,
                                parallel_args = list(plan="callr", workers = 30, show_message = TRUE)
)
})

=== Two-Stage Consistency Evaluation Enabled ===
Stage 1 screening splits: 30 
Maximum total splits: 1000 
Batch size: 20 
================================================

GRF stage for cut selection with dmin, tau = 12 0.6 
tau, maxdepth = 46.75811 2 
   leaf.node control.mean control.size control.se depth
1          2         6.49        82.00       3.34     1
2          3        -4.10       604.00       1.06     1
11         4        -7.90       112.00       2.81     2
21         5         3.86       177.00       1.87     2
4          7        -5.89       356.00       1.33     2

Selected subgroup:
  leaf.node control.mean control.size control.se depth
1         2         6.49        82.00       3.34     1

GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "er <= 0"

All splits:
[1] "er <= 0"   "age <= 50" "age <= 43"
GRF cuts identified: 3 
  Cuts: er <= 0, age <= 50, age <= 43 
# of continuous/categorical characteristics 5 2 
Continuous characteristics: age size nodes pgr er 
Categorical characteristics: meno grade3 
## Prior to lasso: age size nodes pgr er 
#### Lasso selection results 
7 x 1 sparse Matrix of class "dgCMatrix"
                 s0
age     .          
meno    .          
size    0.005433435
grade3  0.178139021
nodes   0.049670523
pgr    -0.001812895
er      .          
Cox-LASSO selected: size grade3 nodes pgr 
Cox-LASSO not selected: age meno er 
### End Lasso selection 
## After lasso: size nodes pgr 
Default cuts included from Lasso: size <= mean(size) size <= median(size) size <= qlow(size) size <= qhigh(size) nodes <= mean(nodes) nodes <= median(nodes) nodes <= qlow(nodes) nodes <= qhigh(nodes) pgr <= mean(pgr) pgr <= median(pgr) pgr <= qlow(pgr) pgr <= qhigh(pgr) 
Categorical after Lasso: grade3 
Factors per GRF: er <= 0 age <= 50 age <= 43 
Initial GRF cuts included er <= 0 age <= 50 age <= 43 
Factors included per GRF (not in lasso) er <= 0 age <= 50 age <= 43 

===== CONSOLIDATED CUT EVALUATION (IMPROVED) =====
Evaluating 16 cut expressions once and caching...
Cut evaluation summary:
  Total cuts:  16 
  Valid cuts:  16 
  Errors:  0 
✓ All 16 factors validated as 0/1
===== END CONSOLIDATED CUT EVALUATION =====

# of candidate subgroup factors= 16 
 [1] "er <= 0"      "age <= 50"    "age <= 43"    "size <= 29.3" "size <= 25"  
 [6] "size <= 20"   "size <= 35"   "nodes <= 5"   "nodes <= 3"   "nodes <= 1"  
[11] "nodes <= 7"   "pgr <= 110"   "pgr <= 32.5"  "pgr <= 7"     "pgr <= 131.8"
[16] "grade3"      
Number of possible configurations (<= maxk): maxk = 2 , # combinations = 528 
Events criteria: control >= 10 , treatment >= 10 
Subgroup search completed in 0.01 minutes
Found 13 subgroup candidate(s)
# of candidate subgroups (meeting all criteria) = 13 
# of unique initial candidates: 13 
# Restricting to top stop_Kgroups = 30 
# of candidates to evaluate: 13 
Algorithm: Two-stage sequential 
  Stage 1 splits: 30 
  Screen threshold: 0.763 
  Max total splits: 1000 
  Batch size: 20 
Parallel processing: callr with 30 workers

*** Subgroup found: {er <= 0} {size <= 35} 
% consistency criteria met= 1 
SG focus= hr 
Subgroup Consistency Minutes= 0.046 
Algorithm used: Two-stage sequential 
Candidates evaluated: 13 
Candidates passed: 7 
Subgroup found (FS) with sg_focus='hr'
Selected subgroup: {er <= 0} & {size <= 35} 
Minutes forestsearch overall = 0.06 
Consistency algorithm used: twostage 
   user  system elapsed 
 19.568   1.593   3.791 
Code
plan("sequential")


# Results for estimation (training) data, which_df = "est" is default
res_tabs <- sg_tables(fs, ndecimals = 3, which_df = "est")

res_tabs$sg10_out
Identified Subgroups
Two-factor subgroups (maxk=2)
Factor 1 Factor 2 N Events E1 HR Pcons
{er <= 0} {size <= 35} 61 34 15 2.537 1.000
{er <= 0} {nodes <= 7} 61 31 11 2.335 0.970
{er <= 0} !{age <= 43} 68 38 14 2.164 0.970
{er <= 0} 82 45 16 1.951 0.970
{er <= 0} !{size <= 20} 61 35 12 2.054 0.960
{er <= 0} {pgr <= 32.5} 75 41 16 2.222 0.950
{er <= 0} {pgr <= 7} 64 34 13 1.992 0.910
Search Configuration: Single-factor candidates (L) = 32; Maximum combinations evaluated = 528; Search depth (maxk) = 2
Search Results: Candidate subgroups found = 13; Maximum HR estimate = 2.54
Note: E1 = events in treatment arm; Pcons = consistency proportion
Code
res_tabs$tab_estimates
Treatment Effect Estimates
Training data estimates
Subgroup n n1 events m1 m0 RMST HR (95% CI)
ITT 686 (100.0%) 246 (35.9%) 299 (43.6%) 66.3 50.2 7.8 0.69 (0.54, 0.89)
Questionable 61 (8.9%) 23 (37.7%) 34 (55.7%) 18.5 48 -19 2.54 (1.25, 5.17)
Recommend 625 (91.1%) 223 (35.7%) 265 (42.4%) 66.7 52.2 9.6 0.61 (0.47, 0.79)

1.5 Bootstrap Inference

Code
#output_dir <- "dev/vignettes-working/applications/gbsg/results"
output_dir <- "results/"
save_results <- dir.exists(output_dir)
# File pre-fix for saving
fileout_boot <- c("gbsg-k2_v3_hr_B=1000")
fileout_cv <- c("gbsg-k2_v3_hr_CV=200")


# patchhwork needed for a combined bootstrap plot (otherwise if not avaialable will not produce)
library(patchwork)

# Number of bootstrap samples
NB <- 1000

system.time({fs_bc <- forestsearch_bootstrap_dofuture(
  fs.est = fs, 
  nb_boots = NB, 
  show_three = FALSE, 
  details = TRUE)
})
Ystar matrix generated should be 'boots x N': 1000 x 686

ForestSearch parameters for bootstrap iterations:
  - sg_focus: hr 
  - maxk: 2 
  - fs.splits: 1000 
  - max_subgroups_search: 30 
  - hr.threshold: 1.25 
  - hr.consistency: 1 
  - pconsistency.threshold: 0.9 
  - n.min: 60 
  - use_twostage: TRUE 
  - use_lasso: TRUE 
  - use_grf: TRUE 
  Bootstrap-specific overrides:
  - grf_res: NULL (forces re-selection)
  - grf_cuts: NULL (forces re-selection)
  - parallel_args: sequential (prevents nested parallelism)
  - details: FALSE (suppressed in workers)
  - plot.sg: FALSE
  - plot.grf: FALSE

=== Bootstrap Analysis Complete ===
Success rate: 87.2% (872/1000)

H (Questionable) Estimates:
  Unadjusted:       2.54 (1.25,5.17) 
  Bias-corrected:  1.89 (0.87,4.12) 

Hc (Recommend) Estimates:
  Unadjusted:       0.61 (0.47,0.79) 
  Bias-corrected:  0.63 (0.44,0.9) 
===================================
    user   system  elapsed 
9665.319  158.408  773.611 
Code
plan("sequential")


if (save_results) {
    filename <- file.path(output_dir, 
                         paste0(fileout_boot, 
                                ".RData"))
    save(df.analysis, fs, fs_bc, file = filename)
    cat("\nResults saved to:", filename, "\n")
}

Results saved to: results//gbsg-k2_v3_hr_B=1000.RData 

1.5.1 Diagnostics and Summaries

Code
#load("~/Documents/GitHub/forestsearch/vignettes/results/sim_gbsg_example_B=1000.RData")

output_dir <- "results/"

load_results <- dir.exists(output_dir)
if(load_results){
filename <- file.path(output_dir, 
                         paste0(fileout_boot,".RData"))

load(file = filename)
}

summaries <- summarize_bootstrap_results(
      sgharm = fs$sg.harm,
      boot_results = fs_bc,
      create_plots = TRUE,
      est.scale = "hr"
    )

===============================================================
           BOOTSTRAP ANALYSIS SUMMARY                          
===============================================================

BOOTSTRAP SUCCESS METRICS:
-------------------------------------------------------------
  Total iterations:              1000
  Successful subgroup ID:        872 (87.2%)
  Failed to find subgroup:       128 (12.8%)

TIMING ANALYSIS:
-------------------------------------------------------------
Overall:
  Total bootstrap time:          12.87 minutes (0.21 hours)
  Average per iteration:         0.01 min (0.8 sec)

Per-iteration timing:
  Mean:                          0.16 min (9.9 sec)
  Median:                        0.15 min (9.2 sec)
  Std Dev:                       0.10 minutes
  Range:                         [0.01, 0.59] minutes
  IQR:                           [0.09, 0.23] minutes

ForestSearch timing (successful iterations only):
  Iterations with FS:            1000 (100.0%)
  Mean FS time:                  0.16 min (9.9 sec)
  Median FS time:                0.15 min (9.2 sec)
  Total FS time:                 164.74 minutes
  FS time % of total:            1280.5%

Overhead timing (Cox models, bias correction, etc.):
  Mean overhead:                 0.00 min (0.0 sec)
  Median overhead:               0.00 min (0.0 sec)
  Total overhead:                0.19 minutes
  Overhead % of total:           1.5%

PERFORMANCE ASSESSMENT:
-------------------------------------------------------------
  Performance rating:            ✓✓✓ Excellent
  Average iteration speed:       0.8 seconds

===============================================================
Code
sg_tab <- summaries$table

sg_tab
Treatment Effect by Subgroup
Bootstrap bias-corrected estimates (1000 iterations)
Subgroup
Sample Size
Survival
Treatment Effect
N NT Events MedT MedC RMSTd HR
(95% CI)1
HR
(95% CI)2
Qstnbl 61 (8.9%) 23 (37.7%) 34 (55.7%) 18.5 48 -19 2.54 (1.25, 5.17) 1.89 (0.87,4.12)
Recmnd 625 (91.1%) 223 (35.7%) 265 (42.4%) 66.7 52.2 9.6 0.61 (0.47, 0.79) 0.63 (0.44,0.9)
1 Unadjusted HR: Standard Cox regression hazard ratio with robust standard errors
2 Bias-corrected HR: Bootstrap-adjusted estimate using infinitesimal jacknife method (1000 iterations). Corrects for optimism in subgroup selection.
Note: Med = Median survival time (months). RMSTd = Restricted mean survival time difference. Subgroup identified in 87.2% of bootstrap samples.
Code
event_summary <- summarize_bootstrap_events(fs_bc, threshold = 12)

=== Bootstrap Event Count Summary ===
Total bootstrap iterations: 1000
Event threshold: <12 events

ORIGINAL Subgroup H on BOOTSTRAP samples:
  Control arm <12 events: 0 (0.0%)
  Treatment arm <12 events: 0 (0.0%)
  Either arm <12 events: 0 (0.0%)

ORIGINAL Subgroup Hc on BOOTSTRAP samples:
  Control arm <12 events: 0 (0.0%)
  Treatment arm <12 events: 0 (0.0%)
  Either arm <12 events: 0 (0.0%)

NEW Subgroups found: 872 (87.2%)

NEW Subgroup H* on ORIGINAL data:
  Control arm <12 events: 29 (3.3% of successful)
  Treatment arm <12 events: 68 (7.8% of successful)
  Either arm <12 events: 94 (10.8% of successful)

NEW Subgroup Hc* on ORIGINAL data:
  Control arm <12 events: 0 (0.0% of successful)
  Treatment arm <12 events: 0 (0.0% of successful)
  Either arm <12 events: 0 (0.0% of successful)
Code
summaries$diagnostics_table_gt
Bootstrap Diagnostics Summary
Analysis of 1000 bootstrap iterations
Category Metric Value
Success Rate1 Total iterations 1000
Successful subgroup ID 872 (87.2%)
Failed to find subgroup 128 (12.8%)
Success rating Good ✓✓
Subgroup H (Questionable) Unadjusted estimate 2.54 (1.25, 5.17)
Bias-corrected estimate 1.89 (0.87, 4.12)
Bias correction impact2 25.4%
CI width change3 3.92 -> 3.26
Subgroup Hc (Recommend) Unadjusted estimate 0.61 (0.47, 0.79)
Bias-corrected estimate 0.63 (0.44, 0.90)
Bias correction impact2 3.2%
CI width change3 0.32 -> 0.47
Bootstrap Quality: H Valid iterations 872
Mean (SD) 0.64 (0.50)
Coefficient of variation4 78.5%
Skewness5 -0.16
Bootstrap Quality: Hc Valid iterations 872
Mean (SD) -0.47 (0.21)
Coefficient of variation4 45.4%
Skewness5 0.15
Search Performance Mean max HR found 3.19 (1.27)
Mean factors evaluated 39.7
Mean combinations tried 838
Proportion at maxk --
1 Success Rate: Proportion of bootstrap samples where ForestSearch identified a valid subgroup
2 Bias Correction Impact: Percentage change from unadjusted to bias-corrected estimate
3 CI Width Change: Confidence interval width before -> after bias correction
4 Coefficient of Variation: Standard deviation as % of mean (lower is better)
5 Skewness: Measure of asymmetry (0 = symmetric, |skew| < 1 is generally good)
Interpretation Guide:

Good stability: Subgroup is reliably identified in most bootstrap samples.

High variability: Bootstrap estimates are imprecise (CV >= 25%). Consider increasing nb_boots or sample size.

Code
summaries$subgroup_summary$original_agreement
                            Metric       Value
                            <char>      <char>
1:      Total bootstrap iterations        1000
2:           Successful iterations         872
3: Failed iterations (no subgroup)         128
4:       Exact match with original 136 (15.6%)
5:         Different from original 736 (84.4%)
Code
summaries$subgroup_summary$factor_presence
  Rank Factor Count   Percent
2    1     er   475 54.472477
6    2    pgr   384 44.036697
7    3   size   262 30.045872
1    4    age   231 26.490826
3    5 grade3   154 17.660550
5    6  nodes   146 16.743119
4    7   meno    67  7.683486
Code
summaries$subgroup_summary$factor_presence_specific
    Rank Base_Factor Factor_Definition Count  Percent
124    1          er         {er <= 0}   288 33.02752
147    2      grade3          {grade3}   152 17.43119
Code
summaries$plots$combined

1.6 Forest Search n-fold cross-validation

Code
output_dir <- "results/"
load_results <- dir.exists(output_dir)
if(load_results){
filename <- file.path(output_dir, 
                         paste0(fileout_boot,".RData"))

load(file = filename)
}

# Kfolds = n (default to n-fold cross-validations)

fs_OOB <- NULL

fs_OOB <- forestsearch_Kfold(fs.est = fs, details = TRUE,
                             parallel_args = list(plan = "callr", workers = 36, show_message = TRUE))
Cross-validation setup:
  - Observations: 686 
  - Folds: 686 
  - Fold sizes (range): 1-1 

ForestSearch parameters for CV folds:
  - sg_focus: hr 
  - maxk: 2 
  - fs.splits: 1000 
  - max_subgroups_search: 30 
  - hr.threshold: 1.25 
  - hr.consistency: 1 
  - pconsistency.threshold: 0.9 
  - n.min: 60 
  - use_twostage: TRUE 
  - use_lasso: TRUE 
  - use_grf: TRUE 
  - (per-fold parallel: sequential)
  - (per-fold details: FALSE)
  - (per-fold plot.sg: FALSE)

Cross-validation complete:
  - Time: 4.82 minutes
  - Subgroup found in 97.8 % of folds
Any found: 0.9781341 
Exact match: 0.8921283 
At least 1 match: 0.9781341 
Cov 1 any: 0.9781341 
Cov 2 any: 0.8921283 
Cov 1 and 2 any: 0.8921283 
Cov 1 exact: 0.9781341 
Cov 2 exact: 0.8921283 
Agreement (sens, ppv) in H and Hc: 0.704918 0.9856 0.8269231 0.9716088 
Code
# Reset workers to single
plan(sequential)

summary_OOB <- forestsearch_KfoldOut(res=fs_OOB, details=TRUE, outall=TRUE)
Any found: 0.9781341 
Exact match: 0.8921283 
At least 1 match: 0.9781341 
Cov 1 any: 0.9781341 
Cov 2 any: 0.8921283 
Cov 1 and 2 any: 0.8921283 
Cov 1 exact: 0.9781341 
Cov 2 exact: 0.8921283 
Agreement (sens, ppv) in H and Hc: 0.704918 0.9856 0.8269231 0.9716088 
        Subgroup        n              n1            m1     m0     RMST 
Overall "ITT"           "686 (100.0%)" "246 (35.9%)" "66.3" "50.2" "7.8"
FA_0    "Not recommend" "61 (8.9%)"    "23 (37.7%)"  "18.5" "48"   "-19"
KfA_0   "Not recommend" "52 (7.6%)"    "14 (26.9%)"  NA     "42.9" "9.3"
FA_1    "Recommend"     "625 (91.1%)"  "223 (35.7%)" "66.7" "52.2" "9.6"
KfA_1   "Recommend"     "634 (92.4%)"  "232 (36.6%)" "66.3" "55"   "6.7"
        Hazard ratio       
Overall "0.69 (0.54, 0.89)"
FA_0    "2.54 (1.25, 5.17)"
KfA_0   "0.60 (0.22, 1.64)"
FA_1    "0.61 (0.47, 0.79)"
KfA_1   "0.71 (0.55, 0.91)"
Code
table(summary_OOB$SGs_found[,1])

!{age <= 43}    {er <= 0} 
           5          666 
Code
table(summary_OOB$SGs_found[,2])

!{age <= 43}    {er <= 0} {nodes <= 7}  {pgr <= 32}  {pgr <= 33}   {pgr <= 7} 
           3            5            3            4           36            1 
{size <= 35} 
         612 
Code
Ksims <- 200

fs_ten <- forestsearch_tenfold(fs.est = fs, sims = Ksims, Kfolds = 10, details = TRUE, 
                       parallel_args = list(plan = "callr", workers = 36, show_message = TRUE))
Starting repeated K-fold cross-validation:
  - Simulations: 200 
  - Folds per simulation: 10 
  - Workers: 13 

ForestSearch parameters for CV folds:
  - sg_focus: hr 
  - maxk: 2 
  - fs.splits: 1000 
  - max_subgroups_search: 30 
  - hr.threshold: 1.25 
  - hr.consistency: 1 
  - pconsistency.threshold: 0.9 
  - n.min: 60 
  - use_twostage: TRUE 
  - use_lasso: TRUE 
  - use_grf: TRUE 
  - (per-fold parallel: sequential)
  - (per-fold details: FALSE)
  - (per-fold plot.sg: FALSE)

Repeated K-fold CV complete:
  - Time: 12.39 minutes
  - Successful simulations: 200 / 200 
  - Projected hours per 100 sims: 0.1 
Code
# Reset workers to single
plan(sequential)

print(fs_ten$find_summary)
       Any      Exact At least 1       Cov1       Cov2  Cov 1 & 2 Cov1 exact 
       0.7        0.0        0.5        0.5        0.0        0.0        0.5 
Cov2 exact 
       0.0 
Code
print(fs_ten$sens_summary)
   sens_H   sens_Hc     ppv_H    ppv_Hc 
0.4918033 0.9664000 0.5763403 0.9506173 
Code
print(head(fs_ten$sens_out))
        sens_H sens_Hc     ppv_H    ppv_Hc
[1,] 0.4590164  0.9680 0.5833333 0.9482759
[2,] 0.5409836  0.9632 0.5892857 0.9555556
[3,] 0.6229508  0.9424 0.5135135 0.9624183
[4,] 0.5245902  0.9600 0.5614035 0.9538951
[5,] 0.3442623  0.9792 0.6176471 0.9386503
[6,] 0.4098361  0.9632 0.5208333 0.9435737
Code
print(head(fs_ten$find_out))
     Any Exact At least 1 Cov1 Cov2 Cov 1 & 2 Cov1 exact Cov2 exact
[1,] 0.6     0        0.5  0.5    0         0        0.5          0
[2,] 0.7     0        0.5  0.5    0         0        0.5          0
[3,] 0.9     0        0.6  0.6    0         0        0.6          0
[4,] 0.7     0        0.5  0.5    0         0        0.5          0
[5,] 0.5     0        0.4  0.4    0         0        0.4          0
[6,] 0.7     0        0.4  0.4    0         0        0.4          0
Code
# Save all results

output_dir <- "results/"
save_results <- dir.exists(output_dir)

if (save_results) {
    filename <- file.path(output_dir, 
                         paste0(fileout_cv, 
                                ".RData"))
    save(df.analysis, fs, fs_bc, fs_ten, fs_OOB, file = filename)
    cat("\nResults saved to:", filename, "\n")
}

Results saved to: results//gbsg-k2_v3_hr_CV=200.RData 
Code
output_dir <- "results/"
load_results <- dir.exists(output_dir)
if(load_results){
filename <- file.path(output_dir, 
                         paste0(fileout_cv,".RData"))

load(file = filename)
}



#' # Define subgroups to display
subgroups <- list(
 age_gt65 = list(
 subset_expr = "age > 65",
 name = "age > 65",
     type = "reference"
   ),
 age_lt65 = list(
 subset_expr = "age <= 65",
 name = "age <= 65",
     type = "reference"
   ),
pgr_positive = list(
 subset_expr = "pgr > 0",
 name = "pgr > 0",
     type = "reference"
   ),
pgr_negative = list(
 subset_expr = "pgr <= 0",
 name = "pgr <= 0",
     type = "reference"
   )
  )


# Create the forest plot
 result <- plot_subgroup_results_forestplot(
   fs_results = list(fs.est = fs, fs_bc = fs_bc, fs_OOB = fs_OOB, fs_kfold = fs_ten),
   df_analysis = df.analysis,
   subgroup_list = subgroups,
   outcome.name = "time_months",
   event.name = "status",
   treat.name = "hormon",
   E.name = "Hormon",
   C.name = "CT",
   ci_column_spaces = 25
 )

# Display the plot
plot(result$plot)